/* 
    Purpose: Using the output from 1a_Census1960_fathers30to50.do,
    		    this file calculates average income at various levels of
    		    variation, separately for self-employed persons and not
    		    self-employed persons. The ratio of self-employed to not
    		    self-employed income (by level of variation)
    		    is then calculated.

    Note: The 1960 5% Census is big enough that it is not 
    	    necessary to weight averages. Perwt confirms that 
    	    everyone is assigned the same weight.

    Creates: All output files have the prefix "SelfEmploymentRatios_1960_"
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

	use ./input/Census1960_5pct_fathers30to50_adjustments.dta, clear 
	sum perwt, d 
	
	keep if occ1950>=200 & occ1950<=300

* Tag self-employed and not self-employed
	gen self_emp = classwkr==1
	gen non_self_emp = classwkr==2
	assert self_emp + non_self_emp ==1
	
/* Count # of self-employed and # of not self-employed 
   at each level of variation */
	egen ncell_SE_all=total(self_emp)
	egen ncell_SE_race=total(self_emp), by(race)
	egen ncell_SE_race_south=total(self_emp), by(race south_merge)
	egen ncell_SE_race_south_edu=total(self_emp), by(race south_merge edu)
	egen ncell_SE_race_region=total(self_emp), by(race region_merge)
	egen ncell_SE_race_region_edu=total(self_emp), by(race region_merge edu)
	
	egen ncell_NSE_all=total(non_self_emp)
	egen ncell_NSE_race=total(non_self_emp), by(race)
	egen ncell_NSE_race_south=total(non_self_emp), by(race south_merge)
	egen ncell_NSE_race_south_edu=total(non_self_emp), by(race south_merge edu)
	egen ncell_NSE_race_region=total(non_self_emp), by(race region_merge)
	egen ncell_NSE_race_region_edu=total(non_self_emp), by(race region_merge edu)
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************************
*** AVERAGE INCOME: SELF EMPLOYMENT
***************************************

	tempfile fulldata
	save `fulldata'

foreach inc in person hh {

	use `fulldata', clear

	local number_obs "20"
	
	if "`inc'"=="person" local income "inctot"
	if "`inc'"=="fam" local income "ftotinc"
	if "`inc'"=="hh" local income "hh_income" 
	
	local measure "mean"

* 0. Overall
	egen SE_income_all_aux=`measure'(`income') if self_emp==1
	egen SE_income_all=mean(SE_income_all_aux)

* 1. By race
	egen SE_income_aux=`measure'(`income') if self_emp==1, by(race)
	egen SE_income=mean(SE_income_aux), by(race)

	//Note: No cell is too small at this level
	tab ncell_SE_race 
	tab ncell_NSE_race
	
* 2. Race x south
	egen SE_income_south_aux=`measure'(`income') if self_emp==1, by(race south_merge)
	egen SE_income_south=mean(SE_income_south_aux), by(race south_merge)
	
	tab ncell_SE_race_south
	tab ncell_NSE_race_south
	tab race south_merge if (ncell_SE_race_south<`number_obs' | ncell_NSE_race_south<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
           the average national income by race for self-employed
           persons will be assigned to the cell. */
	replace SE_income_south = SE_income if (ncell_SE_race_south<`number_obs' | ncell_NSE_race_south<`number_obs')
	
* 3. Race x south x education
	egen SE_income_south_edu_aux=`measure'(`income') if self_emp==1, by(race south_merge edu)
	egen SE_income_south_edu=mean(SE_income_south_edu_aux), by(race south_merge edu)
	
	tab ncell_SE_race_south_edu
	tab ncell_NSE_race_south_edu
	bysort south_merge: tab race edu if (ncell_SE_race_south_edu<`number_obs' | ncell_NSE_race_south_edu<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
           the average national income by race and south
           for self-employed persons will be assigned to the cell. */
	replace SE_income_south_edu = SE_income_south if (ncell_SE_race_south_edu<`number_obs' | ncell_NSE_race_south_edu<`number_obs')
	
	drop *_aux
	
* 4. Race x region
	egen SE_income_region_aux=`measure'(`income') if self_emp==1, by(race region_merge)
	egen SE_income_region=mean(SE_income_region_aux), by(race region_merge)
	
	tab ncell_SE_race_region
	tab ncell_NSE_race_region
	tab race region_merge if (ncell_SE_race_region<`number_obs' | ncell_NSE_race_region<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
           the average national income by race for self-employed
           persons will be assigned to the cell. */	
	replace SE_income_region = SE_income if (ncell_SE_race_region<`number_obs' | ncell_NSE_race_region<`number_obs')
	
*5. Race x region x education
	egen SE_income_region_edu_aux=`measure'(`income') if self_emp==1, by(race region_merge edu)
	egen SE_income_region_edu=mean(SE_income_region_edu_aux), by(race region_merge edu)
	
	tab ncell_SE_race_region_edu
	tab ncell_NSE_race_region_edu
	bysort region_merge: tab race edu if (ncell_SE_race_region_edu<`number_obs' | ncell_NSE_race_region_edu<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
           the average national income by race and region
           for self-employed persons will be assigned to the cell. */
	replace SE_income_region_edu = SE_income_region if (ncell_SE_race_region_edu<`number_obs' | ncell_NSE_race_region_edu<`number_obs')
	
	drop *_aux
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
	
*****************************************
*** AVERAGE INCOME: NON SELF EMPLOYMENT
*****************************************

* 0. Overall
	egen NSE_income_all_aux=`measure'(`income') if non_self_emp==1
	egen NSE_income_all=mean(NSE_income_all_aux)

* 1. By race
	egen NSE_income_aux=`measure'(`income') if non_self_emp==1, by(race)
	egen NSE_income=mean(NSE_income_aux), by(race)

	//Note: No cell is too small at this level	
	tab ncell_SE_race 
	tab ncell_NSE_race
	
* 2. Race x south
	egen NSE_income_south_aux=`measure'(`income') if non_self_emp==1, by(race south_merge)
	egen NSE_income_south=mean(NSE_income_south_aux), by(race south_merge)
	
	tab ncell_SE_race_south
	tab ncell_NSE_race_south
	tab race south_merge if (ncell_SE_race_south<`number_obs' | ncell_NSE_race_south<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
	        the average national income by race for non-self
	        employed persons will be assigned to the cell. */	
	replace NSE_income_south = NSE_income if (ncell_SE_race_south<`number_obs' | ncell_NSE_race_south<`number_obs')
	
* 3. Race x south x education
	egen NSE_income_south_edu_aux=`measure'(`income') if non_self_emp==1, by(race south_merge edu)
	egen NSE_income_south_edu=mean(NSE_income_south_edu_aux), by(race south_merge edu)
	
	tab ncell_SE_race_south_edu
	tab ncell_NSE_race_south_edu
	bysort south_merge: tab race edu if (ncell_SE_race_south_edu<`number_obs' | ncell_NSE_race_south_edu<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
	        the average national income by race and south
	        for non-self employed persons will be assigned to the cell. */
	replace NSE_income_south_edu = NSE_income_south if (ncell_SE_race_south_edu<`number_obs' | ncell_NSE_race_south_edu<`number_obs')
	
* 4. Race x region
	egen NSE_income_region_aux=`measure'(`income') if non_self_emp==1, by(race region_merge)
	egen NSE_income_region=mean(NSE_income_region_aux), by(race region_merge)
	
	tab ncell_SE_race_region
	tab ncell_NSE_race_region
	tab race region_merge if (ncell_SE_race_region<`number_obs' | ncell_NSE_race_region<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
	        the average national income by race for non-self
	        employed persons will be assigned to the cell. */	
	replace NSE_income_region = NSE_income if (ncell_SE_race_region<`number_obs' | ncell_NSE_race_region<`number_obs')
	
* 5. Race x region x education
	egen NSE_income_region_edu_aux=`measure'(`income') if non_self_emp==1, by(race region_merge edu)
	egen NSE_income_region_edu=mean(NSE_income_region_edu_aux), by(race region_merge edu)
	
	tab ncell_SE_race_region_edu
	tab ncell_NSE_race_region_edu
	bysort region_merge: tab race edu if (ncell_SE_race_region_edu<`number_obs' | ncell_NSE_race_region_edu<`number_obs')

	/*Note: If a cell has too few observations (i.e., <20)
	        the average national income by race and region
	        for non-self employed persons will be assigned to the cell. */
	replace NSE_income_region_edu = NSE_income_region if (ncell_SE_race_region_edu<`number_obs' | ncell_NSE_race_region_edu<`number_obs')
	
	drop *_aux
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
	
*****************************************************************
*** RATIOS OF SELF EMPLOYED TO NON SELF EMPLOYED AVG. INCOME
*****************************************************************
	gen ratio_self_emp_all_`inc'= SE_income_all / NSE_income_all
	gen ratio_self_emp_`inc'= SE_income / NSE_income
	gen ratio_self_emp_south_`inc'= SE_income_south / NSE_income_south
	gen ratio_self_emp_south_edu_`inc'= SE_income_south_edu / NSE_income_south_edu
	gen ratio_self_emp_region_`inc'= SE_income_region / NSE_income_region
	gen ratio_self_emp_region_edu_`inc'= SE_income_region_edu / NSE_income_region_edu

	keep race edu region_merge south_merge ratio* serial pernum
	
	label var ratio_self_emp_all_`inc' "Ratio of self-emp to non-self emp (`inc') average income, all"
	label var ratio_self_emp_`inc' "Ratio of self-emp to non-self emp (`inc') average income, by race"
	label var ratio_self_emp_south_`inc' "Ratio of self-emp to non-self emp (`inc') average income, by race x south"
	label var ratio_self_emp_south_edu_`inc' "Ratio of self-emp to non-self emp (`inc') average income, by race x south x edu"
	label var ratio_self_emp_region_`inc' "Ratio of self-emp to non-self emp (`inc') average income, by race x region"
	label var ratio_self_emp_region_edu_`inc' "Ratio of self-emp to non-self emp (`inc') average income, by race x region x edu"

	tempfile income_`inc'
	save `income_`inc''
	
	}
	
	use `income_person', clear
	merge 1:1 serial pernum using `income_hh'
	drop _merge
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*********************
* SAVE (BY LEVEL)
*********************

	compress 

* Overall
	preserve
	keep if _n==1
	gen number=1
	keep number ratio_self_emp_all_person ratio_self_emp_all_hh
	
	save "./output/SelfEmploymentRatios_1960_all.dta", replace
	restore

* By race
	preserve
	bysort race: keep if _n==1
	keep race ratio_self_emp_person ratio_self_emp_hh
	
	save "./output/SelfEmploymentRatios_1960_byrace.dta", replace
	restore

* Race x south
	preserve
	bysort race south_merge: keep if _n==1
	keep race south_merge ratio_self_emp_south_person ratio_self_emp_south_hh
	
	save "./output/SelfEmploymentRatios_1960_byrace_bysouth.dta", replace
	restore

* Race x south x education
	preserve
	bysort race south_merge edu: keep if _n==1
	keep race south_merge edu ratio_self_emp_south_edu_person ratio_self_emp_south_edu_hh
	
	save "./output/SelfEmploymentRatios_1960_byrace_bysouth_byedu.dta", replace
	restore

* Race x south (version for Jácome et al survey respondents)
	preserve
	bysort race south_merge: keep if _n==1
	keep race south_merge ratio_self_emp_south_person ratio_self_emp_south_hh
	
	rename south_merge south_merge_son

	save "./output/SelfEmploymentRatios_1960_byrace_bysouth_R.dta", replace
	restore
	
* Race x region
	preserve
	bysort race region_merge: keep if _n==1
	keep race region_merge ratio_self_emp_region_person ratio_self_emp_region_hh
	
	save "./output/SelfEmploymentRatios_1960_byrace_byregion.dta", replace
	restore

* Race x region x education
	preserve
	bysort race region_merge edu: keep if _n==1
	keep race region_merge edu ratio_self_emp_region_edu_person ratio_self_emp_region_edu_hh
	
	save "./output/SelfEmploymentRatios_1960_byrace_byregion_byedu.dta", replace
	restore
